Link to Git repo: https://github.com/nktang05/kibera.git

Link to Analysis: https://nktang05.github.io/kibera/KiberaAnalysis.html

Data Cleaning

# read in data
setwd("~/Desktop/GRIT/Kibera")
data <- fread("~/Desktop/GRIT/Kibera/kibera_values_data.csv", header = TRUE)
# get rid on unnecessary data columns
data$StartDate<- NULL
data$EndDate<- NULL
data$Status<- NULL
data$IPAddress<- NULL
data$Progress<- NULL
data$'Duration (in seconds)'<- NULL
data$Finished<- NULL
data$ RecordedDate<- NULL
data$ResponseId<- NULL
data$RecipientLastName<- NULL
data$RecipientFirstName<- NULL
data$RecipientEmail<- NULL
data$ExternalReference<- NULL
data$LocationLatitude<- NULL
data$LocationLongitude<- NULL
data$DistributionChannel<- NULL
data$UserLanguage<- NULL
data$"2.11_7_TEXT"<- NULL
data$"2.13_7_TEXT"<- NULL
data$"2.20_5_TEXT"<- NULL
data$"3.2_8_TEXT"<- NULL
data$"3.16_6_TEXT"<- NULL
data$"4.21_5_TEXT"<- NULL
data$"5.1_5_TEXT"<- NULL
data$"5.12_6_TEXT"<- NULL


#summary(data)

#set aside variable labels
variable_labels <- as.character(unlist(data[1, ]))
# drop non data rows
data <- data[-c(1, 2), ]

# change names of 
names(data) <- ifelse(grepl("^[0-9]", names(data)),
                      paste0("x", names(data)),
                      names(data))
# make var numeric
numericVars <- c("x1.1", "x1.2", "x1.3", "x2.1", "x3.1_1_TEXT", "x3.9" )
for (col in numericVars) {
  data[[col]] <- as.numeric(as.character(data[[col]]))
}
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
# make var date
data$x1.4 <- as.Date(data$x1.4, format = "%d/%m/%Y")

# make var string
charVars <- c("x2.5_5_TEXT" , "x2.7_6_TEXT", "x2.10_8_TEXT", "x2.12_10_TEXT", "x2.14_5_TEXT", "x2.16_7_TEXT", "x2.21_9_TEXT", "x3.3_7_TEXT", "x3.13_8_TEXT", 
              "x3.14_7_TEXT", "x3.17_7_TEXT", "x4.10_1_TEXT", "x7.9_6_TEXT") 

for (col in charVars) {
  data[[col]] <- as.character(data[[col]])
}

# make var factor
allVars <- names(data)
excludeVars <- c(numericVars, charVars, "x1.4")
factorVars <- setdiff(allVars, excludeVars)


for (col in factorVars) {
  data[[col]] <- as.factor(data[[col]])
}


names(data)
##   [1] "x1.1"          "x1.2"          "x1.3"          "x1.4"         
##   [5] "x2.1"          "x2.2"          "x2.3"          "x2.4"         
##   [9] "x2.5"          "x2.5_5_TEXT"   "x2.6"          "x2.7"         
##  [13] "x2.7_6_TEXT"   "x2.8"          "x2.9"          "x2.10"        
##  [17] "x2.10_8_TEXT"  "x2.11"         "x2.12"         "x2.12_10_TEXT"
##  [21] "x2.13"         "x2.14"         "x2.14_5_TEXT"  "x2.15"        
##  [25] "x2.16"         "x2.16_7_TEXT"  "x2.17"         "x2.18"        
##  [29] "x2.19.1"       "x2.19.2"       "x2.19.3"       "x2.20"        
##  [33] "x2.21"         "x2.21_9_TEXT"  "x3.1"          "x3.1_1_TEXT"  
##  [37] "x3.2"          "x3.3"          "x3.3_7_TEXT"   "x3.4"         
##  [41] "x3.5"          "x3.6"          "x3.7"          "x3.8"         
##  [45] "x3.9"          "x3.10"         "x3.11"         "x3.12"        
##  [49] "x3.13"         "x3.13_8_TEXT"  "x3.14"         "x3.14_7_TEXT" 
##  [53] "x3.15"         "x3.16"         "x3.17"         "x3.17_7_TEXT" 
##  [57] "x3.18"         "x3.19"         "x3.20"         "x4.1"         
##  [61] "x4.2"          "x4.3"          "x4.4"          "x4.5"         
##  [65] "x4.6"          "x4.7"          "x4.8"          "x4.9"         
##  [69] "x4.10"         "x4.10_1_TEXT"  "x4.11"         "x4.12"        
##  [73] "x4.13"         "x4.14"         "x4.15"         "x4.16"        
##  [77] "x4.17"         "x4.18"         "x4.19"         "x4.20"        
##  [81] "x4.21"         "x4.22"         "x5.23"         "x4.24"        
##  [85] "x4.25"         "x4.26"         "x4.27"         "x4.28"        
##  [89] "x4.29"         "x4.30"         "x5.1"          "x5.2"         
##  [93] "x5.3"          "x5.4"          "x5.5"          "x5.6"         
##  [97] "x5.7"          "x5.8"          "x5.9"          "x5.10"        
## [101] "x5.11"         "x5.12"         "x5.13"         "x5.14"        
## [105] "x5.15"         "x5.16"         "x6.1"          "x6.2"         
## [109] "x6.3"          "x6.4"          "x7.1"          "x7.2"         
## [113] "x7.3"          "x7.4"          "x7.5"          "x7.6"         
## [117] "x7.7"          "x7.8"          "x7.9"          "x7.9_6_TEXT"  
## [121] "x7.10"         "x7.11"         "x7.12"         "x7.13"        
## [125] "x7.14"         "x7.15"         "x7.16"         "x7.17"        
## [129] "x8.1"          "x8.2"          "x8.3"          "x8.4"         
## [133] "x8.5"          "x8.6"          "x8.7"          "x8.8"         
## [137] "x8.9"          "x9.1"          "x9.2"          "x9.3"         
## [141] "x9.4"          "x9.5"          "x9.6"          "x9.7"         
## [145] "x9.8"          "x9.9"
summary(data)
##       x1.1             x1.2            x1.3            x1.4           
##  Min.   : 1.000   Min.   :  1.0   Min.   :  1.0   Min.   :0025-03-19  
##  1st Qu.: 2.000   1st Qu.:134.8   1st Qu.: 10.0   1st Qu.:2025-03-19  
##  Median : 3.000   Median :270.5   Median : 19.0   Median :2025-03-19  
##  Mean   : 3.548   Mean   :282.2   Mean   : 19.2   Mean   :2016-09-11  
##  3rd Qu.: 5.000   3rd Qu.:430.2   3rd Qu.: 27.0   3rd Qu.:2025-03-20  
##  Max.   :55.000   Max.   :614.0   Max.   :303.0   Max.   :2025-09-20  
##  NA's   :13       NA's   :20      NA's   :7       NA's   :50          
##       x2.1          x2.2    x2.3         x2.4     x2.5    x2.5_5_TEXT       
##  Min.   :    8.00    :  9    : 65          :314    :305   Length:520        
##  1st Qu.:   16.00   1:340   1:221   7      : 47   1:161   Class :character  
##  Median :   18.00   2:171   2:234   9      : 37   2: 35   Mode  :character  
##  Mean   :   56.03                   10     : 35   3:  2                     
##  3rd Qu.:   20.00                   8      : 31   4: 13                     
##  Max.   :18415.00                   6      : 14   5:  4                     
##  NA's   :31                         (Other): 42                             
##       x2.6     x2.7    x2.7_6_TEXT        x2.8    x2.9        x2.10    
##         :305    : 13   Length:520          : 21    : 15   6      :256  
##  14     : 98   1:445   Class :character   1:200   1:398   7      :111  
##  11     : 74   2: 11   Mode  :character   2: 37   2: 62   5      : 60  
##  7      : 13   3: 38                      3: 13   3: 37   1      : 47  
##  10     :  9   4:  9                      4: 68   4:  7   2      : 23  
##  9      :  7   5:  2                      5:181   5:  1          : 13  
##  (Other): 14   6:  2                                      (Other): 10  
##  x2.10_8_TEXT       x2.11       x2.12     x2.12_10_TEXT      x2.13   x2.14  
##  Length:520          : 15   3      :132   Length:520          : 12    : 17  
##  Class :character   1:  9   1      :128   Class :character   1: 32   1: 61  
##  Mode  :character   2:398   4      :107   Mode  :character   2:452   2:265  
##                     3: 18   2      : 82                      3: 10   3:103  
##                     4: 53   5      : 28                      4:  9   4: 73  
##                     5:  8          : 14                      5:  5   5:  1  
##                     6: 19   (Other): 29                                     
##  x2.14_5_TEXT           x2.15         x2.16     x2.16_7_TEXT       x2.17  
##  Length:520                :150   1      :214   Length:520          : 13  
##  Class :character   5      : 82   2      :208   Class :character   2: 46  
##  Mode  :character   4      : 67   5      : 27   Mode  :character   3:258  
##                     6      : 66          : 21                      4:187  
##                     3      : 45   6      : 18                      5: 16  
##                     7      : 38   3      : 13                             
##                     (Other): 72   (Other): 19                             
##  x2.18   x2.19.1   x2.19.2   x2.19.3   x2.20       x2.21     x2.21_9_TEXT      
##   : 16      : 26      : 55      : 64    : 20   2      :277   Length:520        
##  1:440   1  :206   1  : 32   1  :324   1:465   4      :108   Class :character  
##  2: 24   1,2:  2   2  :174   1,3:  2   2: 19   3      : 64   Mode  :character  
##  3: 32   1,3:  3   2,3:  1   2  : 75   3:  4   1      : 34                     
##  4:  8   2  :141   3  :234   3  : 47   4: 12          : 14                     
##          3  :134   4  : 24   4  :  8           9      : 13                     
##          4  :  8                               (Other): 10                     
##  x3.1     x3.1_1_TEXT         x3.2          x3.3     x3.3_7_TEXT        x3.4   
##   :125   Min.   : 9.00   2      :229          :219   Length:520          :224  
##  1:297   1st Qu.:13.00          :219   1      :211   Class :character   1: 67  
##  2: 98   Median :14.00   1      : 43   5      : 21   Mode  :character   2:144  
##          Mean   :14.03   3      :  9   3      : 20                      3: 55  
##          3rd Qu.:15.00   4      :  9   7      : 15                      4: 30  
##          Max.   :20.00   1,2    :  5   2      : 14                             
##          NA's   :344     (Other):  6   (Other): 20                             
##  x3.5    x3.6    x3.7    x3.8         x3.9       x3.10       x3.11     x3.12  
##   :224    :227    :228    :114   Min.   : 3.00    :218          :303    :150  
##  1:194   1:138   1:123   1:127   1st Qu.:15.00   1: 36   7      : 59   1:270  
##  2:102   2:155   2:120   2:251   Median :17.00   2:231   1      : 44   2:100  
##                  3: 49   3: 28   Mean   :16.32   3: 35   6      : 43          
##                                  3rd Qu.:18.00           3      : 34          
##                                  Max.   :24.00           2      : 18          
##                                  NA's   :415             (Other): 19          
##      x3.13     x3.13_8_TEXT           x3.14     x3.14_7_TEXT       x3.15  
##         :179   Length:520                :281   Length:520          :136  
##  1      : 95   Class :character   1      : 85   Class :character   1:104  
##  5      : 55   Mode  :character   6      : 75   Mode  :character   2:262  
##  7      : 46                      8      : 33                      3: 18  
##  4      : 26                      2      : 14                             
##  3      : 25                      3      : 13                             
##  (Other): 94                      (Other): 19                             
##      x3.16         x3.17     x3.17_7_TEXT       x3.18   x3.19   x3.20   x4.1   
##         :280          :170   Length:520          :127    :143    :126    :252  
##  5      :134   5      : 91   Class :character   1:203   1: 53   1: 41   1: 54  
##  4      : 35   6      : 47   Mode  :character   2:176   2:294   2:310   2:172  
##  8      : 26   3      : 38                      3: 14   3: 30   3: 43   3: 42  
##  1      : 17   8      : 35                                                     
##  2      : 17   2      : 32                                                     
##  (Other): 11   (Other):107                                                     
##  x4.2    x4.3    x4.4    x4.5    x4.6    x4.7    x4.8         x4.9     x4.10  
##   :471    :468    :469    :468    :468    :471    :243          :448    :438  
##  1: 34   1: 30   1: 15   1: 36   1: 35   1: 21   1: 82   16     : 14   1: 23  
##  2: 15   2: 22   2: 36   2: 16   2: 17   2: 28   2:195   18     : 13   2: 59  
##                                                          19     : 10          
##                                                          20     : 10          
##                                                          15     :  7          
##                                                          (Other): 18          
##  x4.10_1_TEXT           x4.11     x4.12        x4.13     x4.14   x4.15  
##  Length:520                :460    :442           :452    :438    :454  
##  Class :character   1      : 28   1: 14   3 months:  6   1: 37   1: 12  
##  Mode  :character   2      : 11   2: 62   1       :  4   2: 40   2: 48  
##                     one    :  5   3:  2   7 months:  3   3:  5   3:  6  
##                     3      :  4           1 month :  2                  
##                     4      :  4           1 yr    :  2                  
##                     (Other):  8           (Other) : 51                  
##  x4.16   x4.17   x4.18   x4.19   x4.20       x4.21     x4.22   x5.23   x4.24  
##   :440    :439    :439    :439    :442          :457    :441    :442    :442  
##  1: 43   1: 52   1: 50   1: 24   1: 52   1      : 33   1: 70   1: 55   1: 58  
##  2: 37   2: 29   2: 31   2: 57   2: 26   4      : 12   2:  9   2:  7   2: 20  
##                                          2      :  7           3:  9          
##                                          3      :  3           4:  1          
##                                          1,2,3  :  2           5:  2          
##                                          (Other):  6           6:  4          
##  x4.25   x4.26   x4.27   x4.28   x4.29   x4.30   x5.1    x5.2    x5.3   
##   :445    :440    :442    :440    :442    :442    :438    :435    :437  
##  1: 53   1: 38   1: 12   1: 13   1:  5   1: 19   1: 61   1: 28   1: 69  
##  2: 22   2: 42   2: 45   2: 67   2: 72   2: 57   2:  7   2: 57   2: 14  
##                  3: 21           3:  1   3:  2   3:  4                  
##                                                  4:  9                  
##                                                  5:  1                  
##                                                                         
##  x5.4    x5.5    x5.6    x5.7    x5.8    x5.9    x5.10   x5.11       x5.12    
##   :436    :434    :432    :436    :436    :435    :439    :436          :437  
##  1: 62   1: 41   1: 76   1: 75   1: 66   1: 81   1:  6   1: 51   2      : 31  
##  2: 22   2: 38   2: 12   2:  9   2: 18   2:  4   2: 23   2: 33   1      : 23  
##          3:  7                                   3: 18           4      : 14  
##                                                  4: 32           3      : 11  
##                                                  5:  2           1,2    :  1  
##                                                                  (Other):  3  
##  x5.13   x5.14   x5.15   x5.16   x6.1     x6.2     x6.3    x6.4    x7.1   
##   :436    :436    :437    :432    : 59      : 86    : 68    : 73    : 41  
##  1: 33   1: 28   1: 17   1: 56   1:197   1  : 89   1:185   1:282   1:409  
##  2: 51   2: 46   2: 41   2: 19   2:251   1,3:  4   2:230   2:165   2: 40  
##          3: 10   3: 25   3: 13   3: 13   2  :317   3: 37           3: 30  
##                                          2,3:  1                          
##                                          3  : 23                          
##                                                                           
##  x7.2    x7.3    x7.4    x7.5    x7.6    x7.7    x7.8         x7.9    
##   : 66    :409    : 48    :302    : 49    : 44    : 42   4      :189  
##  1: 49   1:  4   1:126   1: 10   1:343   1:  6   1:256          :141  
##  2:405   2:  9   2:313   2: 10   2: 42   2: 81   2:155   1      : 47  
##          3: 26   3: 33   3: 46   3: 17   3:167   3: 67   5      : 38  
##          4: 18           4: 62   4: 69   4:119           3      : 31  
##          5: 54           5: 90           5:103           4,5    : 21  
##                                                          (Other): 53  
##  x7.9_6_TEXT        x7.10   x7.11   x7.12   x7.13   x7.14   x7.15   x7.16  
##  Length:520          : 47    : 48    : 42    :220    : 63    : 47    : 56  
##  Class :character   1:270   1: 16   1:  6   1:  3   1:348   1:367   1:334  
##  Mode  :character   2:203   2: 26   2: 33   2: 17   2: 38   2: 49   2: 86  
##                             3:208   3:140   3: 63   3: 71   3: 57   3: 44  
##                             4:222   4:145   4:217                          
##                                     5:  8                                  
##                                     6:146                                  
##  x7.17   x8.1    x8.2    x8.3    x8.4    x8.5    x8.6    x8.7    x8.8   
##   : 63    : 45    : 98    : 30    : 33    : 34    : 31    : 41    : 39  
##  1:190   1:208   1:200   1: 57   1: 54   1: 58   1: 20   1:208   1: 41  
##  2:198   2:217   2:222   2: 59   2:126   2: 94   2: 29   2:177   2:125  
##  3: 69   3: 11           3: 45   3: 70   3: 74   3: 35   3: 62   3: 67  
##          4: 39           4:140   4:116   4:162   4:166   4: 15   4:139  
##                          5:189   5:121   5: 98   5:239   5: 17   5:109  
##                                                                         
##  x8.9    x9.1    x9.2    x9.3    x9.4    x9.5    x9.6    x9.7    x9.8   
##   : 40    : 36    : 37    : 39    : 24    : 25    : 29    : 27    : 28  
##  1: 44   1:171   1:172   1:170   1:117   1:169   1:185   1:189   1:219  
##  2:101   2:260   2:262   2:246   2:315   2:252   2:233   2:239   2:218  
##  3: 63   3: 23   3: 37   3: 39   3: 39   3: 34   3: 40   3: 41   3: 37  
##  4:171   4: 30   4: 12   4: 26   4: 25   4: 40   4: 33   4: 24   4: 18  
##  5:101                                                                  
##                                                                         
##  x9.9   
##   : 25  
##  1:313  
##  2:145  
##  3: 22  
##  4: 15  
##         
## 
# delete over 20 and under 13
data <- data[data$'x2.1' <= 20, ]
data <- data[data$'x2.1' >= 13, ]



# Remove rows where gender is NA or an empty string
data <- data[!is.na(`x2.2`) & `x2.2` != "", ]

# delete in age is is NA or an empty string
data <- data[!is.na(`x2.1`) & `x2.1` != "", ]
#CHECK FOR MALE CONDITIONALS
#change pregnant to NA if indicated Yes and Male
data$'x4.8'[data$'x2.2' == 2 & data$'x4.8' == 1] <- NA

#change menstual to NA if indicated Yes and Male
data$'x3.1'[data$'x2.2' == 2 & (data[["x3.1"]] == 1 | data[["x3.1"]] == 2)] <- NA
# change menstrual age to NA if age and Male
data[["x3.1_1_TEXT"]][data[["x2.2"]] == 2 & data[["x3.1_1_TEXT"]] != ""] <- NA
# change menstrual prodict to NA if age and Male
data[["x3.2"]][data[["x2.2"]] == 2 & data[["x3.2"]] != ""] <- NA
data[["x3.3"]][data[["x2.2"]] == 2 & data[["x3.3"]] != ""] <- NA
data[["x3.4"]][data[["x2.2"]] == 2 & data[["x3.4"]] != ""] <- NA
data[["x3.5"]][data[["x2.2"]] == 2 & data[["x3.5"]] != ""] <- NA
data[["x3.6"]][data[["x2.2"]] == 2 & data[["x3.6"]] != ""] <- NA
data[["x3.7"]][data[["x2.2"]] == 2 & data[["x3.7"]] != ""] <- NA
#CHECK FOR SEX ACTIVITY CONDITIONALS
data[["x3.9"]][data[["x2.2"]] != 1 & data[["x3.9"]] != ""] <- NA
#query to see duplicate village numbers
sqldf("SELECT [x1.2], COUNT(*) as count 
       FROM data 
       GROUP BY [x1.2] 
       HAVING COUNT(*) > 1")
##    x1.2 count
## 1    NA    18
## 2     1     2
## 3    17     2
## 4    30     3
## 5   202     2
## 6   205     2
## 7   207     2
## 8   208     2
## 9   209     2
## 10  210     2
## 11  211     2
## 12  265     2
## 13  270     2
## 14  271     2
## 15  436     2
## 16  444     2
## 17  451     2
## 18  456     2
## 19  543     3
## 20  607     2
#query checker for if male said they were pregnant
sqldf("SELECT [x2.2], [x4.8] 
      FROM data 
      WHERE [x2.2] = 2 AND [x4.8] = 1")
## [1] x2.2 x4.8
## <0 rows> (or 0-length row.names)
#query for male menstration
sqldf("SELECT [x2.2], [x3.1] 
      FROM data 
      WHERE [x2.2] = 2 AND ([x3.1] = 1 OR [x3.1] = 2)")
## [1] x2.2 x3.1
## <0 rows> (or 0-length row.names)
#query for male menstration age
sqldf("SELECT [x2.2], [x3.1_1_TEXT]
       FROM data 
       WHERE [x2.2] = 2 AND [x3.1_1_TEXT] != ''")
## [1] x2.2        x3.1_1_TEXT
## <0 rows> (or 0-length row.names)
sqldf("SELECT [x2.2], [x3.2]
       FROM data 
       WHERE [x2.2] = 2 AND [x3.2] != ''")
## [1] x2.2 x3.2
## <0 rows> (or 0-length row.names)
#3.3,3.4,3.5,3.6,3.7
sqldf("SELECT [x2.2], [x3.7]
       FROM data 
       WHERE [x2.2] = 2 AND [x3.7] != ''")
## [1] x2.2 x3.7
## <0 rows> (or 0-length row.names)
# query for sex conditionals
sqldf("SELECT [x3.8], [x3.9]
       FROM data 
       WHERE [x3.8] != 1 AND [x3.9] != ''")
## [1] x3.8 x3.9
## <0 rows> (or 0-length row.names)

Make Codebook

#make variable codebook
codebook_output <- codebook(data)
## Warning: There was 1 warning in `dplyr::summarize()`.
## ℹ In argument: `dplyr::across(tidyselect::any_of(variable_names),
##   mangled_skimmers$funs)`.
## ℹ In group 0: .
## Caused by warning:
## ! There were 126 warnings in `dplyr::summarize()`.
## The first warning was:
## ℹ In argument: `dplyr::across(tidyselect::any_of(variable_names),
##   mangled_skimmers$funs)`.
## Caused by warning in `sorted_count()`:
## ! Variable contains value(s) of "" that have been converted to "empty".
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 125 remaining warnings.
## There was 1 warning in `dplyr::summarize()`.
## ℹ In argument: `dplyr::across(tidyselect::any_of(variable_names),
##   mangled_skimmers$funs)`.
## ℹ In group 0: .
## Caused by warning:
## ! There were 126 warnings in `dplyr::summarize()`.
## The first warning was:
## ℹ In argument: `dplyr::across(tidyselect::any_of(variable_names),
##   mangled_skimmers$funs)`.
## Caused by warning in `sorted_count()`:
## ! Variable contains value(s) of "" that have been converted to "empty".
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 125 remaining warnings.
# make label variables
for (i in seq_along(data)) {
  var_label(data[[i]]) <- variable_labels[i]
}
#test for success
#var_label(data)
data$x2.2 <- labelled(
  as.integer(data$x2.2),  # Ensure it's numeric or integer
  labels = c("Yes" = 1, "No" = 2)
)
#summary(data)

summary(data$x2.1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.00   15.00   18.00   17.12   19.00   20.00

Export clean data and codebook

# write new csv of clean data
fwrite(data, "kibera_values_cleaned.csv")

#write codebook
saveRDS(data, file = "codebook.rds")